/*
 * Copyright (c) 2011, Vicent Marti
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include "buffer.h"
#include "autolink.h"

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>

#if defined(_WIN32)
#define strncasecmp _strnicmp
#endif

int sd_autolink_issafe( const uint8_t* link, size_t link_len )
{
    static const size_t valid_uris_count = 5;
    static const char*  valid_uris[] =
    {
        "/", "http://", "https://", "ftp://", "mailto:"
    };

    size_t i;

    for( i = 0; i < valid_uris_count; ++i )
    {
        size_t len = strlen( valid_uris[i] );

        if( link_len > len
            && strncasecmp( (char*) link, valid_uris[i], len ) == 0
            && isalnum( link[len] ) )
            return 1;
    }

    return 0;
}


static size_t autolink_delim( uint8_t* data, size_t link_end, size_t max_rewind, size_t size )
{
    uint8_t cclose, copen = 0;
    size_t  i;

    for( i = 0; i < link_end; ++i )
        if( data[i] == '<' )
        {
            link_end = i;
            break;
        }

    while( link_end > 0 )
    {
        if( strchr( "?!.,", data[link_end - 1] ) != NULL )
            link_end--;

        else if( data[link_end - 1] == ';' )
        {
            size_t new_end = link_end - 2;

            while( new_end > 0 && isalpha( data[new_end] ) )
                new_end--;

            if( new_end < link_end - 2 && data[new_end] == '&' )
                link_end = new_end;
            else
                link_end--;
        }
        else
            break;
    }

    if( link_end == 0 )
        return 0;

    cclose = data[link_end - 1];

    switch( cclose )
    {
    case '"':
        copen = '"'; break;

    case '\'':
        copen = '\''; break;

    case ')':
        copen = '('; break;

    case ']':
        copen = '['; break;

    case '}':
        copen = '{'; break;
    }

    if( copen != 0 )
    {
        size_t  closing = 0;
        size_t  opening = 0;
        size_t  i = 0;

        /* Try to close the final punctuation sign in this same line;
         * if we managed to close it outside of the URL, that means that it's
         * not part of the URL. If it closes inside the URL, that means it
         * is part of the URL.
         *
         * Examples:
         *
         *	foo http://www.pokemon.com/Pikachu_(Electric) bar
         *		=> http://www.pokemon.com/Pikachu_(Electric)
         *
         *	foo (http://www.pokemon.com/Pikachu_(Electric)) bar
         *		=> http://www.pokemon.com/Pikachu_(Electric)
         *
         *	foo http://www.pokemon.com/Pikachu_(Electric)) bar
         *		=> http://www.pokemon.com/Pikachu_(Electric))
         *
         *	(foo http://www.pokemon.com/Pikachu_(Electric)) bar
         *		=> foo http://www.pokemon.com/Pikachu_(Electric)
         */

        while( i < link_end )
        {
            if( data[i] == copen )
                opening++;
            else if( data[i] == cclose )
                closing++;

            i++;
        }

        if( closing != opening )
            link_end--;
    }

    return link_end;
}


static size_t check_domain( uint8_t* data, size_t size, int allow_short )
{
    size_t i, np = 0;

    if( !isalnum( data[0] ) )
        return 0;

    for( i = 1; i < size - 1; ++i )
    {
        if( data[i] == '.' )
            np++;
        else if( !isalnum( data[i] ) && data[i] != '-' )
            break;
    }

    if( allow_short )
    {
        /* We don't need a valid domain in the strict sense (with
         * least one dot; so just make sure it's composed of valid
         * domain characters and return the length of the the valid
         * sequence. */
        return i;
    }
    else
    {
        /* a valid domain needs to have at least a dot.
         * that's as far as we get */
        return np ? i : 0;
    }
}


size_t sd_autolink__www( size_t* rewind_p, struct buf* link, uint8_t* data,
                         size_t max_rewind, size_t size, unsigned int flags )
{
    size_t link_end;

    if( max_rewind > 0 && !ispunct( data[-1] ) && !isspace( data[-1] ) )
        return 0;

    if( size < 4 || memcmp( data, "www.", strlen( "www." ) ) != 0 )
        return 0;

    link_end = check_domain( data, size, 0 );

    if( link_end == 0 )
        return 0;

    while( link_end < size && !isspace( data[link_end] ) )
        link_end++;

    link_end = autolink_delim( data, link_end, max_rewind, size );

    if( link_end == 0 )
        return 0;

    bufput( link, data, link_end );
    *rewind_p = 0;

    return (int) link_end;
}


size_t sd_autolink__email( size_t* rewind_p, struct buf* link, uint8_t* data,
                           size_t max_rewind, size_t size, unsigned int flags )
{
    size_t link_end, rewind;
    int nb = 0, np = 0;

    for( rewind = 0; rewind < max_rewind; ++rewind )
    {
        uint8_t c = data[-rewind - 1];

        if( isalnum( c ) )
            continue;

        if( strchr( ".+-_", c ) != NULL )
            continue;

        break;
    }

    if( rewind == 0 )
        return 0;

    for( link_end = 0; link_end < size; ++link_end )
    {
        uint8_t c = data[link_end];

        if( isalnum( c ) )
            continue;

        if( c == '@' )
            nb++;
        else if( c == '.' && link_end < size - 1 )
            np++;
        else if( c != '-' && c != '_' )
            break;
    }

    if( link_end < 2 || nb != 1 || np == 0
        || !isalpha( data[link_end - 1] ) )
        return 0;

    link_end = autolink_delim( data, link_end, max_rewind, size );

    if( link_end == 0 )
        return 0;

    bufput( link, data - rewind, link_end + rewind );
    *rewind_p = rewind;

    return link_end;
}


size_t sd_autolink__url( size_t* rewind_p, struct buf* link, uint8_t* data,
                         size_t max_rewind, size_t size, unsigned int flags )
{
    size_t link_end, rewind = 0, domain_len;

    if( size < 4 || data[1] != '/' || data[2] != '/' )
        return 0;

    while( rewind < max_rewind && isalpha( data[-rewind - 1] ) )
        rewind++;

    if( !sd_autolink_issafe( data - rewind, size + rewind ) )
        return 0;

    link_end = strlen( "://" );

    domain_len = check_domain(
            data + link_end,
            size - link_end,
            flags & SD_AUTOLINK_SHORT_DOMAINS );

    if( domain_len == 0 )
        return 0;

    link_end += domain_len;

    while( link_end < size && !isspace( data[link_end] ) )
        link_end++;

    link_end = autolink_delim( data, link_end, max_rewind, size );

    if( link_end == 0 )
        return 0;

    bufput( link, data - rewind, link_end + rewind );
    *rewind_p = rewind;

    return link_end;
}
